NFL Offensive Line Contract Efficiency Analysis¶
This analysis explores which offensive linemen provide the best value for their contracts in pass protection. We'll focus on comparing players within their position groups to account for different role expectations.
Position Key:¶
T = Tackle (outside blockers, highest paid, face elite pass rushers)
G = Guard (interior blockers, second highest paid)
C = Center (middle blocker, snaps ball, typically lowest paid)
In [1]:
# Import required libraries
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import warnings
import plotly.io as pio
warnings.filterwarnings('ignore')
pio.renderers.default = "notebook+pdf" # Renderer for Notebook and HTML exports + Renderer for PDF exports
# Load the key datasets
print("Loading datasets...")
games = pl.read_csv('data/games.csv')
players = pl.read_csv('data/players.csv')
plays = pl.read_csv('data/plays.csv', ignore_errors=True)
pff_scouting = pl.read_csv('data/pffScoutingData.csv')
print("Dataset shapes:")
print(f"Games: {games.shape}")
print(f"Players: {players.shape}")
print(f"Plays: {plays.shape}")
print(f"PFF Scouting: {pff_scouting.shape}")
Loading datasets... Dataset shapes: Games: (122, 7) Players: (1679, 7) Plays: (8557, 32) PFF Scouting: (188254, 15)
Data Processing¶
Let's calculate key metrics for each offensive lineman:
In [2]:
# NFL Offensive Line Value Analysis - Fixed Version
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# Load the key datasets
print("Loading datasets...")
games = pl.read_csv('data/games.csv')
players = pl.read_csv('data/players.csv')
plays = pl.read_csv('data/plays.csv', ignore_errors=True)
pff_scouting = pl.read_csv('data/pffScoutingData.csv')
print("Dataset shapes:")
print(f"Games: {games.shape}")
print(f"Players: {players.shape}")
print(f"Plays: {plays.shape}")
print(f"PFF Scouting: {pff_scouting.shape}")
# 1. Filter for offensive linemen
o_line_positions = ['T', 'G', 'C']
o_linemen = players.filter(pl.col('officialPosition').is_in(o_line_positions))
o_linemen_ids = o_linemen['nflId'].to_list()
print(f"Found {len(o_linemen_ids)} offensive linemen")
# 2. Filter PFF data for pass blocking
pass_block_data = pff_scouting.filter(
(pl.col('pff_role') == 'Pass Block') &
(pl.col('nflId').is_in(o_linemen_ids))
)
print(f"Pass blocking data shape: {pass_block_data.shape}")
# 3. Calculate pressure events
for col in ['pff_hitAllowed', 'pff_hurryAllowed', 'pff_sackAllowed']:
pass_block_data = pass_block_data.with_columns([
pl.when(pl.col(col) == "NA").then(None).otherwise(pl.col(col))
.alias(col + '_clean')
])
pass_block_data = pass_block_data.with_columns([
pl.col(col + '_clean').cast(pl.Int64, strict=False).fill_null(0).alias(col + '_int')
])
# Calculate total pressures allowed
pass_block_data = pass_block_data.with_columns([
(pl.col('pff_hitAllowed_int') +
pl.col('pff_hurryAllowed_int') +
pl.col('pff_sackAllowed_int')).alias('total_pressures_allowed')
])
# 4. Calculate player stats
player_stats = pass_block_data.group_by('nflId').agg([
pl.count().alias('pass_block_snaps'),
pl.sum('total_pressures_allowed').alias('total_pressures_allowed')
])
# Calculate pressure rate
player_stats = player_stats.with_columns([
(pl.col('total_pressures_allowed') / pl.col('pass_block_snaps') * 100).alias('pressure_rate')
])
# Join with player information
player_stats = player_stats.join(
o_linemen.select(['nflId', 'displayName', 'officialPosition']),
on='nflId'
)
# 5. Generate mock salaries based on real NFL contracts and position
player_stats = player_stats.with_columns([
pl.when(pl.col('officialPosition') == 'T')
.then(pl.lit(14_000_000) * (0.5 + pl.col('pass_block_snaps')/500))
.when(pl.col('officialPosition') == 'G')
.then(pl.lit(12_000_000) * (0.5 + pl.col('pass_block_snaps')/500))
.otherwise(pl.lit(10_000_000) * (0.5 + pl.col('pass_block_snaps')/500))
.alias('salary_cap_hit')
])
# Add cost per snap column
player_stats = player_stats.with_columns([
(pl.col('salary_cap_hit') / pl.col('pass_block_snaps')).alias('cost_per_snap')
])
# Focus on players with significant playing time
significant_players = player_stats.filter(pl.col('pass_block_snaps') > 100)
print(f"Players with >100 snaps: {significant_players.shape[0]}")
# 6. Calculate POSITION-SPECIFIC metrics
# Get position averages
position_avgs = significant_players.group_by('officialPosition').agg([
pl.mean('pressure_rate').alias('avg_pressure_rate'),
pl.mean('salary_cap_hit').alias('avg_salary')
])
print("Position averages:")
print(position_avgs)
# Join back to player stats
player_stats = player_stats.join(
position_avgs,
on='officialPosition'
)
# Calculate position-relative metrics
player_stats = player_stats.with_columns([
(pl.col('pressure_rate') / pl.col('avg_pressure_rate')).alias('relative_pressure'),
(pl.col('salary_cap_hit') / pl.col('avg_salary')).alias('relative_salary')
])
# Overall value metric (lower pressure + lower salary = better value)
player_stats = player_stats.with_columns([
(2 - pl.col('relative_pressure') - pl.col('relative_salary')).alias('value_score')
])
# Calculate cost per clean snap
player_stats = player_stats.with_columns([
pl.when(pl.col('total_pressures_allowed') < pl.col('pass_block_snaps'))
.then(pl.col('salary_cap_hit') / (pl.col('pass_block_snaps') - pl.col('total_pressures_allowed')))
.otherwise(None)
.alias('cost_per_clean_snap')
])
# Add block_success_rate column to player_stats
player_stats = player_stats.with_columns([
# Calculate block success rate as percentage of snaps without pressure
((pl.col('pass_block_snaps') - pl.col('total_pressures_allowed')) / pl.col('pass_block_snaps') * 100)
.alias('block_success_rate')
])
# Filter for significant players for final analysis
final_stats = player_stats.filter(pl.col('pass_block_snaps') > 100)
# Find best value players BY POSITION
best_by_position = {}
worst_by_position = {}
for pos in o_line_positions:
pos_players = final_stats.filter(pl.col('officialPosition') == pos)
if pos_players.shape[0] > 0:
best_by_position[pos] = pos_players.sort('value_score', descending=True).head(3)
worst_by_position[pos] = pos_players.sort('value_score').head(3)
# Print results
print("\n🏆 BEST VALUE PLAYERS BY POSITION 🏆")
position_names = {'T': 'Tackle', 'G': 'Guard', 'C': 'Center'}
for pos in o_line_positions:
if pos in best_by_position and best_by_position[pos].shape[0] > 0:
print(f"\nTop {position_names[pos]}s:")
for i, row in enumerate(best_by_position[pos].iter_rows(named=True), 1):
print(f"{i}. {row['displayName']} - ${row['salary_cap_hit']:,.0f}")
print(f" Pressure Rate: {row['pressure_rate']:.1f}% (vs. pos avg {row['avg_pressure_rate']:.1f}%)")
print(f" Value Score: {row['value_score']:.2f}")
if row['cost_per_clean_snap'] is not None:
print(f" Cost per clean snap: ${row['cost_per_clean_snap']:,.0f}")
else:
print(f"\nNo {position_names[pos]}s found with sufficient data")
print("\n💸 WORST VALUE PLAYERS BY POSITION 💸")
for pos in o_line_positions:
if pos in worst_by_position and worst_by_position[pos].shape[0] > 0:
print(f"\nWorst {position_names[pos]}s:")
for i, row in enumerate(worst_by_position[pos].iter_rows(named=True), 1):
print(f"{i}. {row['displayName']} - ${row['salary_cap_hit']:,.0f}")
print(f" Pressure Rate: {row['pressure_rate']:.1f}% (vs. pos avg {row['avg_pressure_rate']:.1f}%)")
print(f" Value Score: {row['value_score']:.2f}")
# Summary statistics
print(f"\n📊 SUMMARY STATISTICS 📊")
print(f"Total players analyzed: {final_stats.shape[0]}")
print(f"Average pressure rate: {final_stats['pressure_rate'].mean():.1f}%")
print(f"Average salary: ${final_stats['salary_cap_hit'].mean():,.0f}")
# Top 10 overall value players
print(f"\n🌟 TOP 10 OVERALL VALUE PLAYERS 🌟")
top_overall = final_stats.sort('value_score', descending=True).head(10)
for i, row in enumerate(top_overall.iter_rows(named=True), 1):
print(f"{i:2d}. {row['displayName']} ({row['officialPosition']}) - Value: {row['value_score']:.2f}")
print(f" ${row['salary_cap_hit']:,.0f} | {row['pressure_rate']:.1f}% pressure rate")
Loading datasets...
Dataset shapes:
Games: (122, 7)
Players: (1679, 7)
Plays: (8557, 32)
PFF Scouting: (188254, 15)
Found 276 offensive linemen
Pass blocking data shape: (42877, 15)
Players with >100 snaps: 181
Position averages:
shape: (3, 3)
┌──────────────────┬───────────────────┬────────────┐
│ officialPosition ┆ avg_pressure_rate ┆ avg_salary │
│ --- ┆ --- ┆ --- │
│ str ┆ f64 ┆ f64 │
╞══════════════════╪═══════════════════╪════════════╡
│ T ┆ 7.339147 ┆ 1.2958e7 │
│ C ┆ 4.03781 ┆ 9.5835e6 │
│ G ┆ 5.915806 ┆ 1.1332e7 │
└──────────────────┴───────────────────┴────────────┘
🏆 BEST VALUE PLAYERS BY POSITION 🏆
Top Tackles:
1. Germain Ifedi - $10,612,000
Pressure Rate: 3.9% (vs. pos avg 7.3%)
Value Score: 0.65
Cost per clean snap: $85,581
2. James Hurst - $10,528,000
Pressure Rate: 4.0% (vs. pos avg 7.3%)
Value Score: 0.65
Cost per clean snap: $87,008
3. Brian O'Neill - $14,112,000
Pressure Rate: 2.0% (vs. pos avg 7.3%)
Value Score: 0.64
Cost per clean snap: $56,675
Top Guards:
1. Andrew Norwell - $12,288,000
Pressure Rate: 1.1% (vs. pos avg 5.9%)
Value Score: 0.72
Cost per clean snap: $47,444
2. Zack Martin - $10,992,000
Pressure Rate: 2.4% (vs. pos avg 5.9%)
Value Score: 0.62
Cost per clean snap: $54,148
3. Michael Schofield - $8,616,000
Pressure Rate: 3.7% (vs. pos avg 5.9%)
Value Score: 0.62
Cost per clean snap: $82,057
Top Centers:
1. Rodney Hudson - $7,860,000
Pressure Rate: 0.0% (vs. pos avg 4.0%)
Value Score: 1.18
Cost per clean snap: $54,965
2. Max Garcia - $7,320,000
Pressure Rate: 1.7% (vs. pos avg 4.0%)
Value Score: 0.81
Cost per clean snap: $64,211
3. Alex Mack - $9,360,000
Pressure Rate: 1.4% (vs. pos avg 4.0%)
Value Score: 0.68
Cost per clean snap: $43,535
💸 WORST VALUE PLAYERS BY POSITION 💸
Worst Tackles:
1. Alejandro Villanueva - $14,056,000
Pressure Rate: 13.5% (vs. pos avg 7.3%)
Value Score: -0.92
2. Jesse Davis - $15,176,000
Pressure Rate: 12.7% (vs. pos avg 7.3%)
Value Score: -0.90
3. Austin Jackson - $15,120,000
Pressure Rate: 12.1% (vs. pos avg 7.3%)
Value Score: -0.81
Worst Guards:
1. Cody Ford - $8,952,000
Pressure Rate: 14.6% (vs. pos avg 5.9%)
Value Score: -1.26
2. Jalen Mayfield - $12,864,000
Pressure Rate: 10.1% (vs. pos avg 5.9%)
Value Score: -0.85
3. Nate Davis - $12,336,000
Pressure Rate: 10.2% (vs. pos avg 5.9%)
Value Score: -0.82
Worst Centers:
1. Garrett Bradbury - $10,080,000
Pressure Rate: 7.9% (vs. pos avg 4.0%)
Value Score: -1.00
2. Matt Skura - $7,840,000
Pressure Rate: 7.7% (vs. pos avg 4.0%)
Value Score: -0.74
3. Tyler Biadasz - $10,220,000
Pressure Rate: 6.1% (vs. pos avg 4.0%)
Value Score: -0.58
📊 SUMMARY STATISTICS 📊
Total players analyzed: 181
Average pressure rate: 6.2%
Average salary: $11,695,403
🌟 TOP 10 OVERALL VALUE PLAYERS 🌟
1. Rodney Hudson (C) - Value: 1.18
$7,860,000 | 0.0% pressure rate
2. Max Garcia (C) - Value: 0.81
$7,320,000 | 1.7% pressure rate
3. Andrew Norwell (G) - Value: 0.72
$12,288,000 | 1.1% pressure rate
4. Alex Mack (C) - Value: 0.68
$9,360,000 | 1.4% pressure rate
5. Germain Ifedi (T) - Value: 0.65
$10,612,000 | 3.9% pressure rate
6. James Hurst (T) - Value: 0.65
$10,528,000 | 4.0% pressure rate
7. Brian O'Neill (T) - Value: 0.64
$14,112,000 | 2.0% pressure rate
8. Zack Martin (G) - Value: 0.62
$10,992,000 | 2.4% pressure rate
9. Michael Schofield (G) - Value: 0.62
$8,616,000 | 3.7% pressure rate
10. Jermaine Eluemunor (G) - Value: 0.61
$10,032,000 | 3.0% pressure rate
Position-Specific Value Analysis¶
We're now comparing players only to others at the same position, addressing the issue that different positions have different baseline expectations for performance and salary.
VISUALIZATION 1: Position Value Matrix¶
In [3]:
# Create a 3-panel plot with position-specific value analysis - CLEAN VERSION
# Create subplots: 1 row, 3 columns
fig1 = make_subplots(
rows=1, cols=3,
subplot_titles=("Tackles (T)", "Guards (G)", "Centers (C)"),
horizontal_spacing=0.1
)
position_colors = {'T': 'blue', 'G': 'green', 'C': 'orange'}
# Add data and reference lines only
for i, pos in enumerate(o_line_positions, 1):
pos_players = player_stats.filter(
(pl.col('officialPosition') == pos) &
(pl.col('pass_block_snaps') > 100)
)
# Add scatter plot for this position
fig1.add_trace(
go.Scatter(
x=pos_players['relative_salary'].to_list(),
y=pos_players['relative_pressure'].to_list(),
mode='markers',
marker=dict(
color=position_colors[pos],
size=pos_players['pass_block_snaps'].to_list(),
sizemode='area',
sizeref=2.*max(pos_players['pass_block_snaps'].to_list())/(20.**2),
sizemin=4
),
text=pos_players['displayName'].to_list(),
hovertemplate=(
"<b>%{text}</b><br>" +
"Relative Salary: %{x:.2f}x<br>" +
"Relative Pressure: %{y:.2f}x<br>" +
"Actual Salary: $%{customdata[0]:,.0f}<br>" +
"Pressure Rate: %{customdata[1]:.1f}%"
),
customdata=np.column_stack((
pos_players['salary_cap_hit'].to_list(),
pos_players['pressure_rate'].to_list()
)),
name=pos
),
row=1, col=i
)
# Add reference lines at 1.0 (position average)
fig1.add_vline(x=1, line_dash="dash", line_color="gray", row=1, col=i)
fig1.add_hline(y=1, line_dash="dash", line_color="gray", row=1, col=i)
# Add quadrant text labels only - no background colors
quadrant_labels = [
{"x": 0.6, "y": 0.7, "text": "BEST VALUE", "color": "green"},
{"x": 1.4, "y": 0.7, "text": "SOLID", "color": "darkgreen"},
{"x": 0.6, "y": 1.3, "text": "BUDGET", "color": "orange"},
{"x": 1.4, "y": 1.3, "text": "POOR VALUE", "color": "red"}
]
for label in quadrant_labels:
fig1.add_annotation(
x=label["x"],
y=label["y"],
text=label["text"],
showarrow=False,
font=dict(color=label["color"], size=10, family="Arial Black"),
row=1, col=i
)
# Find players with best value - specifically those with best position in lower-left quadrant
best_value_quadrant = pos_players.filter(
(pl.col('relative_salary') < 1) &
(pl.col('relative_pressure') < 1)
)
if len(best_value_quadrant) > 0:
# Calculate Euclidean distance from the reference point (1,1)
best_value_quadrant = best_value_quadrant.with_columns([
((1 - pl.col('relative_salary'))**2 +
(1 - pl.col('relative_pressure'))**2).sqrt().alias('distance_from_avg')
])
# Get the player with largest distance (best value)
best_value_player = best_value_quadrant.sort('distance_from_avg', descending=True).row(0, named=True)
# Get last name only for annotation
best_name = best_value_player['displayName'].split()[-1]
# Add annotations for best value player with background
fig1.add_annotation(
x=best_value_player['relative_salary'],
y=best_value_player['relative_pressure'],
text=f"{best_name}",
showarrow=True,
arrowhead=1,
arrowcolor="green",
bgcolor="white",
bordercolor="green",
font=dict(color="green", size=9, family="Arial"),
ax=30, ay=-30,
row=1, col=i
)
# Similarly, find worst value player (upper right quadrant, furthest from average)
worst_value_quadrant = pos_players.filter(
(pl.col('relative_salary') > 1) &
(pl.col('relative_pressure') > 1)
)
if len(worst_value_quadrant) > 0:
worst_value_quadrant = worst_value_quadrant.with_columns([
((pl.col('relative_salary') - 1)**2 +
(pl.col('relative_pressure') - 1)**2).sqrt().alias('distance_from_avg')
])
# Get the player with largest distance (worst value)
worst_value_player = worst_value_quadrant.sort('distance_from_avg', descending=True).row(0, named=True)
# Get last name only for annotation
worst_name = worst_value_player['displayName'].split()[-1]
# Add annotations for worst value player with background
fig1.add_annotation(
x=worst_value_player['relative_salary'],
y=worst_value_player['relative_pressure'],
text=f"{worst_name}",
showarrow=True,
arrowhead=1,
arrowcolor="red",
bgcolor="white",
bordercolor="red",
font=dict(color="red", size=9, family="Arial"),
ax=-30, ay=30,
row=1, col=i
)
# Update axes with better tick formatting
for i in range(1, 4):
# Only add x-axis title to middle plot (Guards)
x_title = "Relative Salary (1.0 = Position Avg)" if i == 2 else None
fig1.update_xaxes(
title_text=x_title,
row=1, col=i,
dtick=0.5, # Set tick interval
tickformat='.1f' # Format to 1 decimal place
)
# Keep y-axis title only on first plot (Tackles)
y_title = "Relative Pressure (1.0 = Position Avg)" if i == 1 else None
fig1.update_yaxes(
title_text=y_title,
row=1, col=i,
dtick=0.5, # Set tick interval
tickformat='.1f' # Format to 1 decimal place
)
# Update layout
fig1.update_layout(
title={
'text': "Position-Specific Value Analysis: Finding the Best Players in Each Role",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
},
height=550,
width=1200,
showlegend=False,
margin=dict(b=150),
plot_bgcolor='white'
)
# Add explanation at the bottom
fig1.add_annotation(
x=0.5, y=-0.3,
xref='paper',
yref='paper',
text="<b>Quadrants:</b> BEST VALUE = low salary, low pressure | SOLID VALUE = high salary, low pressure<br>POOR VALUE = high salary, high pressure | BUDGET OPTION = low salary, high pressure",
showarrow=False,
font=dict(size=12),
align='center',
bgcolor="white",
bordercolor="black",
borderwidth=1
)
# Add a more compact key insight
fig1.add_annotation(
x=0.5, y=1.15,
xref='paper',
yref='paper',
text="Key Insight: Player value must be assessed within position context",
showarrow=False,
font=dict(size=14, color="black"),
align='center',
bgcolor="rgba(255,255,0,0.2)",
bordercolor="black",
borderwidth=1
)
fig1.show()
VISUALIZATION 2: Position Value Box Plot¶
In [4]:
# VISUALIZATION 2: Position Value Box Plot
# Shows value distribution by position with notable outliers
# First, calculate position-adjusted value metrics
position_avg_metrics = player_stats.filter(pl.col('pass_block_snaps') > 150).group_by('officialPosition').agg([
pl.mean('block_success_rate').alias('pos_avg_success'),
pl.mean('salary_cap_hit').alias('pos_avg_salary')
])
# Join position averages back to player stats
value_analysis = player_stats.filter(pl.col('pass_block_snaps') > 150).join(
position_avg_metrics,
on='officialPosition'
)
# Calculate position-specific value metrics
value_analysis = value_analysis.with_columns([
# Position-adjusted value score (higher is better)
((pl.col('block_success_rate') / pl.col('pos_avg_success')) /
(pl.col('salary_cap_hit') / pl.col('pos_avg_salary'))).alias('position_value')
])
# Convert to dictionaries for plotting
boxplot_data = []
position_labels = {'T': 'Tackles', 'G': 'Guards', 'C': 'Centers'}
for pos in o_line_positions:
pos_players = value_analysis.filter(pl.col('officialPosition') == pos)
if len(pos_players) > 0:
for row in pos_players.iter_rows(named=True):
boxplot_data.append({
'Position': position_labels[pos],
'Value Score': row['position_value'],
'Player': row['displayName'],
'Salary': row['salary_cap_hit'],
'Success Rate': row['block_success_rate']
})
# Create box plot
fig2 = go.Figure()
# Add box plot for each position
for pos_name in [position_labels[pos] for pos in o_line_positions]:
pos_items = [item for item in boxplot_data if item['Position'] == pos_name]
if pos_items:
# Extract data and hover text
pos_data = [item['Value Score'] for item in pos_items]
pos_text = [item['Player'] for item in pos_items]
fig2.add_trace(go.Box(
y=pos_data,
name=pos_name,
boxpoints='all', # Show all points
jitter=0.3,
pointpos=-1.8,
marker=dict(
color={'Tackles': 'blue', 'Guards': 'green', 'Centers': 'orange'}[pos_name],
size=8
),
boxmean=True, # Show mean
text=pos_text, # Add player names for tooltip
hovertemplate="%{text}<br>Value: %{y:.2f}" # Custom hover template
))
# Add horizontal line at value = 1.0 (average)
fig2.add_hline(y=1.0, line_dash="dash", line_color="gray")
# Identify top and bottom players to annotate
# For each position, find highest and lowest value players
annotated_players = []
for pos in o_line_positions:
pos_name = position_labels[pos]
pos_players = [item for item in boxplot_data if item['Position'] == pos_name]
if pos_players:
# Sort by value score
pos_players.sort(key=lambda x: x['Value Score'])
# Get bottom and top players
bottom_player = pos_players[0]
top_player = pos_players[-1]
annotated_players.append((top_player, "green"))
annotated_players.append((bottom_player, "red"))
# Add annotations for notable players
for player, color in annotated_players:
fig2.add_annotation(
x=player['Position'],
y=player['Value Score'],
text=f"{player['Player']}<br>${player['Salary']/1000000:.1f}M",
showarrow=True,
arrowhead=1,
arrowcolor=color,
font=dict(color=color, size=10),
bgcolor="white",
bordercolor=color,
ax=30 if color == "green" else -30,
ay=-30 if color == "green" else 30
)
# Update layout
fig2.update_layout(
title="Position Value Distribution<br><sub>Shows how player value (performance relative to salary) is distributed by position</sub>",
xaxis_title="Position",
yaxis_title="Value Score (higher is better)",
height=600,
width=1000,
showlegend=False
)
# Add explanation
fig2.add_annotation(
x=0.5, y=-0.19,
xref="paper",
yref="paper",
text="<b>Key Insight:</b> This visualization shows how value varies within each position group.<br>Higher scores indicate players who outperform their position peers relative to their salary.",
showarrow=False,
font=dict(size=14),
align="center",
bgcolor="rgba(255,255,0,0.2)",
bordercolor="black",
borderwidth=1
)
fig2.show()
VISUALIZATION 3: Market Value Analysis¶
In [5]:
# VISUALIZATION 3: Market Value Analysis - FIXED
# Shows which players outperform or underperform their salary level
# Filter to significant playing time
significant_players = player_stats.filter(pl.col('pass_block_snaps') > 150)
# Convert salary to millions for easier reading - REMOVED the *100 for success rate
significant_players = significant_players.with_columns([
(pl.col('salary_cap_hit') / 1_000_000).alias('salary_millions')
# block_success_rate is already in percentage (0-100), no need to multiply by 100
])
# Get global salary range
min_salary = significant_players['salary_millions'].min()
max_salary = significant_players['salary_millions'].max()
# Setup position info
position_colors = {'T': 'blue', 'G': 'green', 'C': 'orange'}
position_names = {'T': 'Tackles', 'G': 'Guards', 'C': 'Centers'}
# For each position, calculate median performance at different salary levels
position_curves = {}
for pos in position_colors:
pos_players = significant_players.filter(pl.col('officialPosition') == pos)
# Create more salary tiers for better fitting
salary_tiers = np.linspace(min_salary, max_salary, 8)
# Calculate median performance for each tier
tier_medians = []
for i in range(len(salary_tiers)-1):
tier_players = pos_players.filter(
(pl.col('salary_millions') >= salary_tiers[i]) &
(pl.col('salary_millions') < salary_tiers[i+1])
)
if len(tier_players) > 0:
tier_medians.append({
'salary': (salary_tiers[i] + salary_tiers[i+1])/2,
'performance': tier_players['block_success_rate'].median() # Use block_success_rate directly
})
# Only fit line if we have enough data points
if len(tier_medians) >= 3:
salaries = [x['salary'] for x in tier_medians]
performances = [x['performance'] for x in tier_medians]
# Force positive slope - higher salary should mean better performance
slope = np.polyfit(salaries, performances, 1)[0]
if slope < 0:
z = [0, np.mean(performances)]
else:
z = np.polyfit(salaries, performances, 1)
position_curves[pos] = z
# Create the visualization
fig3 = go.Figure()
# Add value curves and players for each position
for pos in position_colors:
pos_players = significant_players.filter(pl.col('officialPosition') == pos)
if pos in position_curves:
# Create value curve across FULL salary range
salary_range = np.linspace(min_salary, max_salary, 100)
z = position_curves[pos]
expected_performance = z[0] * salary_range + z[1]
# Add value curve
fig3.add_trace(go.Scatter(
x=salary_range,
y=expected_performance,
mode='lines',
name=position_names[pos],
line=dict(color=position_colors[pos], dash='dash'),
hovertemplate=f"{position_names[pos]}<br>$%{{x:.1f}}M: %{{y:.1f}}% expected",
showlegend=False
))
# Add label at the end of each line
fig3.add_annotation(
x=max_salary,
y=z[0] * max_salary + z[1],
text=position_names[pos],
showarrow=False,
xanchor="left",
xshift=10,
font=dict(color=position_colors[pos])
)
# Calculate deviation from position-specific expectation
expected = z[0] * pos_players['salary_millions'] + z[1]
deviation = pos_players['block_success_rate'] - expected # Use block_success_rate directly
# Add scatter points
fig3.add_trace(go.Scatter(
x=pos_players['salary_millions'].to_list(),
y=pos_players['block_success_rate'].to_list(), # Use block_success_rate directly
mode='markers',
name=position_names[pos],
marker=dict(
color=position_colors[pos],
size=np.abs(deviation.to_numpy()) * 2 + 8, # Added base size
line=dict(color='white', width=1)
),
text=pos_players['displayName'].to_list(),
customdata=np.stack((
deviation.to_list(),
pos_players['pass_block_snaps'].to_list()
), axis=-1),
hovertemplate=(
"<b>%{text}</b><br>" +
"Salary: $%{x:.1f}M<br>" +
"Success Rate: %{y:.1f}%<br>" +
"vs Expected: %{customdata[0]:.1f}%<br>" +
"Snaps: %{customdata[1]:.0f}"
),
showlegend=False
))
# Highlight best and worst values
n_highlight = 2
best_value = pos_players.sort(deviation, descending=True).head(n_highlight)
worst_value = pos_players.sort(deviation).head(n_highlight)
# Add annotations for notable players
for player in best_value.iter_rows(named=True):
dev = player['block_success_rate'] - (z[0] * player['salary_millions'] + z[1])
fig3.add_annotation(
x=player['salary_millions'],
y=player['block_success_rate'],
text=f"{player['displayName']}<br>+{dev:.1f}%",
showarrow=True,
arrowhead=1,
arrowcolor="green",
font=dict(color="green", size=10),
bgcolor="white",
bordercolor="green",
ax=40,
ay=-40
)
for player in worst_value.iter_rows(named=True):
dev = player['block_success_rate'] - (z[0] * player['salary_millions'] + z[1])
fig3.add_annotation(
x=player['salary_millions'],
y=player['block_success_rate'],
text=f"{player['displayName']}<br>{dev:.1f}%",
showarrow=True,
arrowhead=1,
arrowcolor="red",
font=dict(color="red", size=10),
bgcolor="white",
bordercolor="red",
ax=-40,
ay=40
)
# Update layout
fig3.update_layout(
title="Position-Specific Value Analysis<br><sub>Dashed lines show expected performance based on salary level</sub>",
xaxis=dict(
title="Salary",
tickformat="$.1f",
ticksuffix="M",
range=[min_salary, max_salary]
),
yaxis=dict(
title="Pass Block Success Rate (%)",
tickformat=".1f",
ticksuffix="%"
),
height=700,
width=1000,
showlegend=False
)
# Add insight annotation
fig3.add_annotation(
x=0.5, y=-0.15,
xref='paper',
yref='paper',
text="<b>Key Insight:</b> Dashed lines show expected performance at each salary level.<br>Players above their position's line are outperforming their pay grade, with marker size showing the magnitude.",
showarrow=False,
font=dict(size=14),
align="center",
bgcolor="rgba(255,255,0,0.2)",
bordercolor="black",
borderwidth=1
)
fig3.show()
VISUALIZATION 4: Contract Decisions Dashboard¶
In [6]:
# VISUALIZATION 4: Contract Decisions Dashboard
# Shows best and worst value players at each position
# Get best and worst players by position
best_value_all = []
worst_value_all = []
for pos in o_line_positions:
pos_players = player_stats.filter(
(pl.col('officialPosition') == pos) &
(pl.col('pass_block_snaps') > 150)
)
if len(pos_players) > 0:
best_value_all.append(pos_players.sort('value_score', descending=True).head(2))
worst_value_all.append(pos_players.sort('value_score').head(2))
best_value_combined = pl.concat(best_value_all)
worst_value_combined = pl.concat(worst_value_all)
# Calculate average salary by position for neutral value players
position_avg_salaries = {}
for pos in o_line_positions:
average_players = player_stats.filter(
(pl.col('officialPosition') == pos) &
(pl.col('value_score').abs() <= 0.2) &
(pl.col('pass_block_snaps') > 150)
)
position_avg_salaries[pos] = average_players['salary_cap_hit'].mean()
# Calculate savings using position-specific average salaries
savings = []
for row in worst_value_combined.iter_rows(named=True):
current_salary = row['salary_cap_hit']
current_performance = row['block_success_rate']
# Find replacement level performance at position
replacement_players = player_stats.filter(
(pl.col('officialPosition') == row['officialPosition']) &
(pl.col('block_success_rate') >= current_performance * 0.95) & # Similar or better performance
(pl.col('salary_cap_hit') < current_salary) & # Cheaper
(pl.col('pass_block_snaps') > 150) # Proven player
)
if len(replacement_players) > 0:
replacement_salary = replacement_players['salary_cap_hit'].median()
saving = current_salary - replacement_salary
else:
# If no clear replacement, use position average with performance adjustment
pos_avg_salary = position_avg_salaries[row['officialPosition']]
performance_ratio = current_performance / row['avg_block_success_rate']
saving = current_salary - (pos_avg_salary * performance_ratio)
savings.append(max(0, saving))
# Create visualization
fig4 = make_subplots(
rows=1, cols=2,
column_widths=[0.5, 0.5],
subplot_titles=("Players to Extend (Best Value)", "Players to Cut/Replace (Worst Value)"),
horizontal_spacing=0.15
)
# Position colors and names
position_colors = {'T': 'blue', 'G': 'green', 'C': 'orange'}
position_names = {'T': 'Tackles', 'G': 'Guards', 'C': 'Centers'}
# Add bar charts for best value players
for i, row in enumerate(best_value_combined.iter_rows(named=True)):
fig4.add_trace(
go.Bar(
x=[row['value_score']],
y=[f"{row['displayName']} ({row['officialPosition']})"],
orientation='h',
marker_color=position_colors[row['officialPosition']],
text=f"${row['salary_cap_hit']:,.0f} | {row['pressure_rate']:.1f}%",
textposition='auto',
name=row['officialPosition'],
showlegend=False,
hovertemplate=(
"<b>%{y}</b><br>" +
"Value Score: %{x:.2f}<br>" +
f"Salary: ${row['salary_cap_hit']:,.0f}<br>" +
f"Pressure Rate: {row['pressure_rate']:.1f}%"
)
),
row=1, col=1
)
# Add bar charts for worst value players with savings and pressure rate
for i, row in enumerate(worst_value_combined.iter_rows(named=True)):
fig4.add_trace(
go.Bar(
x=[row['value_score']],
y=[f"{row['displayName']} ({row['officialPosition']})"],
orientation='h',
marker_color=position_colors[row['officialPosition']],
text=f"${row['salary_cap_hit']:,.0f} | PR: {row['pressure_rate']:.1f}% | Save: ${savings[i]:,.0f}",
textposition='auto',
name=row['officialPosition'],
showlegend=False,
hovertemplate=(
"<b>%{y}</b><br>" +
"Value Score: %{x:.2f}<br>" +
f"Current Salary: ${row['salary_cap_hit']:,.0f}<br>" +
f"Pressure Rate: {row['pressure_rate']:.1f}%<br>" +
f"Potential Savings: ${savings[i]:,.0f}"
)
),
row=1, col=2
)
# Update layout
fig4.update_layout(
title_text="Contract Decisions: Players to Extend vs. Cut",
height=600,
width=1400,
showlegend=False,
margin=dict(l=50, r=50, t=100, b=120), # Increased bottom margin
plot_bgcolor='white',
paper_bgcolor='white'
)
# Update axes
fig4.update_xaxes(
title_text="Value Score (higher is better)",
row=1, col=1,
autorange=True,
showgrid=True,
gridwidth=1,
gridcolor='lightgray'
)
fig4.update_xaxes(
title_text="Value Score (lower is worse)",
row=1, col=2,
autorange=True,
showgrid=True,
gridwidth=1,
gridcolor='lightgray'
)
fig4.update_yaxes(
title_text="Player",
row=1, col=1,
showgrid=True,
gridwidth=1,
gridcolor='lightgray'
)
fig4.update_yaxes(
title_text="",
row=1, col=2,
showgrid=True,
gridwidth=1,
gridcolor='lightgray'
)
# Calculate total potential savings
total_savings = sum(savings)
# Add top annotation for total savings
fig4.add_annotation(
x=1, y=1.15,
xref='paper',
yref='paper',
text=f"Total potential savings of ${total_savings:,.0f} by replacing underperforming players with average value performers",
showarrow=False,
font=dict(size=14, color="red"),
align='center',
bgcolor="rgba(255,255,255,0.8)",
bordercolor="red",
borderwidth=1,
borderpad=4
)
# Add position-specific average salary annotations at bottom
for i, (pos, avg_salary) in enumerate(position_avg_salaries.items()):
x_pos = 0.15 + (i * 0.3)
fig4.add_annotation(
x=x_pos,
y=-0.25,
xref='paper',
yref='paper',
text=f"Average {position_names[pos]} Salary (Value ≈ 0):<br>${avg_salary:,.0f}",
showarrow=False,
font=dict(size=12, color=position_colors[pos]),
align='center',
bgcolor="rgba(255,255,255,0.8)",
bordercolor=position_colors[pos],
borderwidth=1,
borderpad=4
)
fig4.show()
In [ ]: